source("utilities.R")
source("get_tweets.R")
source("munge_tweets.R")
source("semantic_analysis.R")

# get tweets from #LAK13
tweets <- searchTwitter('$GOOG', 100)
#tweets <- PreprocessTweets(tweets)
#corpus <- ConstructCorpus(lak13$text, removeTags=TRUE, removeUsers=TRUE, stemming=TRUE)

td = tempfile()
dir.create(td)

print(td);

item_count = length(tweets$text);
for (i in 1 : item_count) {
	tweet_name = paste("tweet_", i, sep="");
	write( tweets$text[i], file=paste(td, tweet_name, sep="/") );		
}

data(stopwords_en)
myMatrix = textmatrix(td, stopwords=c(","))
myMatrix = lw_logtf(myMatrix) * gw_idf(myMatrix)
myLSAspace = lsa(myMatrix, dims=dimcalc_share())
tm =  as.textmatrix(myLSAspace)
d = dist(t(tm))

fit = cmdscale(d, eig=T, k =2)
#points <- data.frame(x = fit$points[, 1], y = fit$points[, 2])
plot(fit$points[,1], fit$points[,2])

